{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "┌ Info: Precompiling Knet [1902f260-5fb4-5aff-8c31-6271790ab950]\n",
      "└ @ Base loading.jl:1186\n",
      "WARNING: Method definition gcnode(AutoGrad.Node) in module AutoGrad at /home/gridsan/dyuret/.julia/dev/AutoGrad/src/core.jl:38 overwritten in module Knet at /home/gridsan/dyuret/.julia/dev/Knet/src/gcnode.jl:18.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Main.REINFORCE_DISCRETE"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "include(\"reinforce_discrete.jl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "julia reinforce_discrete.jl\n",
       "\n",
       "This example implements the REINFORCE algorithm from `Simple statistical gradient-following algorithms for connectionist reinforcement learning.`,  Williams, Ronald J. Machine learning, 8(3-4):229–256, 1992. This example also demonstrates the usage of the `@zerograd` function for stopping the gradient flow.\n"
      ],
      "text/plain": [
       "  julia reinforce_discrete.jl\n",
       "\n",
       "  This example implements the REINFORCE algorithm from \u001b[36mSimple statistical gradient-following algorithms for connectionist reinforcement learning.\u001b[39m,\n",
       "  Williams, Ronald J. Machine learning, 8(3-4):229–256, 1992. This example also demonstrates the usage of the \u001b[36m@zerograd\u001b[39m function for stopping the\n",
       "  gradient flow."
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "@doc REINFORCE_DISCRETE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "usage: <PROGRAM> [--env_id ENV_ID] [--episodes EPISODES]\n",
      "                 [--gamma GAMMA] [--threshold THRESHOLD] [--lr LR]\n",
      "                 [--render] [--usegpu]\n",
      "\n",
      "(c) Ozan Arkan Can, 2018. Demonstration of the REINFORCE algorithm on\n",
      "the discrete action space.\n",
      "\n",
      "optional arguments:\n",
      "  --env_id ENV_ID       environment name (default: \"CartPole-v1\")\n",
      "  --episodes EPISODES   number of episodes (type: Int64, default: 20)\n",
      "  --gamma GAMMA         doscount factor (type: Float64, default: 0.99)\n",
      "  --threshold THRESHOLD\n",
      "                        stop the episode even it is not terminal after\n",
      "                        number of steps exceeds the threshold (type:\n",
      "                        Int64, default: 1000)\n",
      "  --lr LR               learning rate (type: Float64, default: 0.01)\n",
      "  --render              render the environment\n",
      "  --usegpu              use GPU or not\n",
      "\n"
     ]
    }
   ],
   "source": [
    "REINFORCE_DISCRETE.main(\"--help\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 1 , total rewards: 15.0\n",
      "episode 2 , total rewards: 10.0\n",
      "episode 3 , total rewards: 13.0\n",
      "episode 4 , total rewards: 12.0\n",
      "episode 5 , total rewards: 22.0\n"
     ]
    }
   ],
   "source": [
    "REINFORCE_DISCRETE.main(\"--episodes 5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Main.REINFORCE_CONTINUOUS"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "include(\"reinforce_continuous.jl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "usage: <PROGRAM> [--env_id ENV_ID] [--episodes EPISODES]\n",
      "                 [--gamma GAMMA] [--threshold THRESHOLD] [--lr LR]\n",
      "                 [--render] [--hidden HIDDEN] [--usegpu]\n",
      "\n",
      "(c) Ozan Arkan Can, 2018. Demonstration of the REINFORCE algorithm on\n",
      "the continuous action space.\n",
      "\n",
      "optional arguments:\n",
      "  --env_id ENV_ID       environment name (default:\n",
      "                        \"MountainCarContinuous-v0\")\n",
      "  --episodes EPISODES   number of episodes (type: Int64, default: 100)\n",
      "  --gamma GAMMA         discount factor (type: Float64, default: 0.9)\n",
      "  --threshold THRESHOLD\n",
      "                        stop the episode even it is not terminal after\n",
      "                        number of steps exceeds the threshold (type:\n",
      "                        Int64, default: 1000)\n",
      "  --lr LR               learning rate (type: Float64, default: 0.001)\n",
      "  --render              render the environment\n",
      "  --hidden HIDDEN       hidden units (type: Int64, default: 120)\n",
      "  --usegpu              use GPU or not\n",
      "\n"
     ]
    }
   ],
   "source": [
    "REINFORCE_CONTINUOUS.main(\"--help\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 1 , total rewards: -203.32223495311246\n",
      "episode 2 , total rewards: -198.91641245616063\n",
      "episode 3 , total rewards: -185.8963724304913\n",
      "episode 4 , total rewards: -191.35332615055856\n",
      "episode 5 , total rewards: -189.83679064392982\n"
     ]
    }
   ],
   "source": [
    "REINFORCE_CONTINUOUS.main(\"--episodes 5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Julia 1.0.0",
   "language": "julia",
   "name": "julia-1.0"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.0.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
